This project looks at using Hidden Markov Models to identify Federer’s shot intentions during the Australian Open, 2017.
Using two hidden states, shots intentions can be categorized as ‘return’, meaning the shot was just meant to be returned to the opponent, and ‘winner’ or ‘aggressive’, meaning that the intention of the shots was to win the point with that shot.
The depmixS4 package includes the function depmix, which is used for estimation of this particular hidden markov model.
library(devtools)
load("/Users/Jeremy/Documents/Winter Project/intentions/data/federer2017.RData")
library(ggplot2)
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
Adding new variables that are manipulations of the information contained in the datatset.
fed_df <- federer2017
fed_df <- arrange(fed_df, matchid, set, game, point, shot)
#Reorienting shots so that all Federer's shot are from negative x side of court, and all opponents shots are from positive x side. y coordinates have been flipped as required.
#Federer
fed_df <- fed_df %>% rowwise %>%
mutate(ballmark.x = ifelse(impact.player == "FEDERER" & start.x >= 0, -ballmark.x, ballmark.x)) %>%
mutate(ballmark.y = ifelse(impact.player == "FEDERER" & start.x >= 0, -ballmark.y, ballmark.y)) %>%
mutate(start.y = ifelse(impact.player == "FEDERER" & start.x >= 0, -start.y, start.y)) %>%
mutate(final.x = ifelse(impact.player == "FEDERER" & start.x >= 0, -final.x, ballmark.x)) %>%
mutate(final.y = ifelse(impact.player == "FEDERER" & start.x >= 0, -final.y, ballmark.y)) %>%
mutate(start.x = ifelse(impact.player == "FEDERER" & start.x >= 0, -start.x, start.x))
#Opponents
fed_df <- fed_df %>% rowwise %>%
mutate(start.y = ifelse(impact.player != "FEDERER" & start.x <= 0, -start.y, start.y)) %>%
mutate(ballmark.x = ifelse(impact.player != "FEDERER" & start.x <= 0, -ballmark.x, ballmark.x)) %>%
mutate(ballmark.y = ifelse(impact.player != "FEDERER" & start.x <= 0, -ballmark.y, ballmark.y)) %>%
mutate(final.x = ifelse(impact.player != "FEDERER" & start.x <= 0, -final.x, ballmark.x)) %>%
mutate(final.y = ifelse(impact.player != "FEDERER" & start.x <= 0, -final.y, ballmark.y)) %>%
mutate(start.x = ifelse(impact.player != "FEDERER" & start.x <= 0, -start.x, start.x))
#Adding Vars
fed_df <- fed_df %>%
mutate(lastshot = ifelse(shot == final.shot,1,0)) %>%
mutate(isserver = ifelse(server == impact.player,1,0)) %>%
mutate(fhand = ifelse(hitpoint == "F",1,0)) %>%
mutate(opponent = ifelse(server == "FEDERER", receiver, server))
fed_df <- fed_df %>%
mutate(winner = ifelse(ended.in.error == FALSE & lastshot == 1,1,0)) %>%
mutate(retser = ifelse(shot == 2,1,0)) %>%
mutate(retser1 = ifelse(shot == 2 & serve.classification == 1,1,0)) %>%
mutate(retser2 = ifelse(shot == 2 & serve.classification == 2,1,0))
for (i in 1:ncol(fed_df)) {
if(is.character(fed_df[,i]) == TRUE) {
fed_df[,i] <- as.factor(fed_df[,i])
}
}
library(stringi)
ids <- fed_df$id
split.ids <- t(data.frame(stri_split_boundaries(ids, type="character")))
fed_df <- fed_df %>%
plyr::mutate(server.points = split.ids[,1], receiver.points = split.ids[,2], server.games = split.ids[,3], receiver.games = split.ids[,4], server.sets = split.ids[,5], receiver.sets = split.ids[,6])
#Now mutate this to Federer score and opponent score
fed_df <- fed_df %>% rowwise %>%
mutate(fed.points = ifelse(server == "FEDERER", as.numeric(server.points), as.numeric(receiver.points))) %>%
mutate(fed.games = ifelse(server == "FEDERER", as.numeric(server.games), as.numeric(receiver.games))) %>%
mutate(fed.sets = ifelse(server == "FEDERER", as.numeric(server.sets), as.numeric(receiver.sets))) %>%
mutate(opp.points = ifelse(server == "FEDERER", as.numeric(receiver.points), as.numeric(server.points))) %>%
mutate(opp.games = ifelse(server == "FEDERER", as.numeric(receiver.games), as.numeric(server.games))) %>%
mutate(opp.sets = ifelse(server == "FEDERER", as.numeric(receiver.sets), as.numeric(server.sets)))
fed_df <- fed_df %>%
mutate(sets.diff = fed.sets - opp.sets) %>%
mutate(games.diff = fed.games - opp.games) %>%
mutate(points.diff = fed.points - opp.points)
fed_df <- fed_df %>%
mutate(speed.diff = NA) %>%
mutate(oppo.hit.x = NA) %>%
mutate(oppo.hit.y = NA) %>%
mutate(oppo.hit.z = NA) %>%
mutate(oppo.speed = NA) %>%
mutate(oppo.ballmark.x = NA) %>%
mutate(oppo.ballmark.y = NA) %>%
mutate(speed.ratio = NA)
for (i in 2:nrow(fed_df)) {
if(fed_df$shot[i] != 1) {#so that only non-serves are affected
fed_df$speed.diff[i]=fed_df$speed1[i]-fed_df$speed1[i-1]
#speed difference
fed_df$oppo.hit.x[i]=fed_df$start.x[i-1]
fed_df$oppo.hit.y[i]=fed_df$start.y[i-1]
fed_df$oppo.hit.z[i]=fed_df$start.z[i-1]
fed_df$oppo.speed[i]=fed_df$speed1[i-1]
fed_df$oppo.ballmark.x[i]=fed_df$ballmark.x[i-1]
fed_df$oppo.ballmark.y[i]=fed_df$ballmark.y[i-1]
#oppo hit
}
}
fed_df <- fed_df %>%
mutate(speed.ratio = speed1/oppo.speed) %>% #speed ratio
mutate(side.dist = 4.115 - abs(oppo.ballmark.y)) %>% #distance of oppo.ballmark from sideline
mutate(base.dist = 11.89 - abs(oppo.ballmark.x)) %>% #distance of oppo.ballmark from baseline
mutate(short.dist = min(side.dist, base.dist)) #shortest distance from any line
#Adding angles
fed_df <- fed_df %>%
mutate(p.start.x = NA) %>%
mutate(p.start.y = NA)
for (i in 3:nrow(fed_df)) {
if(fed_df$impact.player[i] == fed_df$impact.player[i-2] & fed_df$shot[i] == fed_df$shot[i-2] + 2) {
fed_df$p.start.x[i]=fed_df$start.x[i-2]
fed_df$p.start.y[i]=fed_df$start.y[i-2]
}
}
#adding angle between fed.shot-opp.shot vector and opp.shot-opp.ballmark vector
#doing it in one line because df doesn't want to add vectors
fed_df <- fed_df %>%
mutate(o.angle = NA)
for (i in 1:nrow(fed_df)) {
x1 <- fed_df$p.start.x[i]
y1 <- fed_df$p.start.y[i]
x2 <- fed_df$oppo.hit.x[i]
y2 <- fed_df$oppo.hit.y[i]
x3 <- fed_df$oppo.ballmark.x[i]
y3 <- fed_df$oppo.ballmark.y[i]
o.angle <- acos(
((c(x1,y1)-c(x2,y2))/sqrt((x1-x2)^2+(y1-y2)^2)) %*%
((c(x2,y2)-c(x3,y3))/sqrt((x2-x3)^2+(y2-y3)^2))) * 180/pi
fed_df$o.angle[i] = ifelse(o.angle > 90, 180-o.angle, o.angle)
}
#Now adding the angle the player hits
fed_df <- fed_df %>%
mutate(p.angle = NA)
for (i in 1:nrow(fed_df)) {
x1 <- fed_df$oppo.hit.x[i]
y1 <- fed_df$oppo.hit.y[i]
x2 <- fed_df$start.x[i]
y2 <- fed_df$start.y[i]
x3 <- fed_df$ballmark.x[i]
y3 <- fed_df$ballmark.y[i]
p.angle <- acos(
((c(x1,y1)-c(x2,y2))/sqrt((x1-x2)^2+(y1-y2)^2)) %*%
((c(x2,y2)-c(x3,y3))/sqrt((x2-x3)^2+(y2-y3)^2))) * 180/pi
fed_df$p.angle[i] = ifelse(p.angle > 90, 180-p.angle, p.angle)
}
fed_df <- fed_df %>%
mutate(lag.p.angle = 0) %>%
mutate(lag.speed.ratio = 0)
#Add p.angle and speed ratio of prior shot
for (i in 3:nrow(fed_df)) {
fed_df$lag.p.angle[i] = ifelse(fed_df$impact.player[i]==fed_df$impact.player[i-2] & fed_df$shot[i] >= 4, fed_df$p.angle[i-2], 0)
fed_df$lag.speed.ratio[i] = ifelse(fed_df$impact.player[i]==fed_df$impact.player[i-2] & fed_df$shot[i] >= 4, fed_df$speed.ratio[i-2], 0)
}
lastshot - last shot in the point
fedhit - federer hits the shot
isserver - if server hits the shot
winner - if shot is winner (i.e. point doesn’t end in error, as opponent doesn’t reach the ball)
retser - if shot is returning serve
retser1 - if shot is returning first serve
retser2 - if shot is returning second serve
speed.diff - difference in current shot speed vs opponents last shot speed (at impact)
speed.ratio - shot speed/opponent’s previous shot speed
speed1 - speed of shot from impact with racquet
o.angle - angle made by opponents previous shot
p.angle - angle made by player’s current shot
lag.p.angle - angle made by player’s previous shot in rally
lag.speed.ratio - speed ratio of player’s previous in rally
oppo.hit - coordinates of opponents previous impact
fed.points/games/sets - fed score up to current point
opp.points/games/sets - opponent score up to current point
points/games/sets.diff - difference in score from Federer’s perspective
#remove opponents
fed_no.opp <- fed_df %>%
dplyr::filter(impact.player == "FEDERER")
#federer only - remove serves
fed_only <- fed_no.opp %>%
filter(hitpoint != "S") %>%
dplyr::select(-serveid)
#First remove any errors for start, ballmark, so that we are only looking at shots that we hit onto the other side
#To check: ggplot(fed_only, aes(ballmark.x, ballmark.y)) + geom_point()
fed_only <- fed_only %>%
filter(oppo.hit.x >= 0) %>%
filter(oppo.ballmark.x <= 0) %>%
filter(ballmark.x >= 0)
#Adding shot number cumulative count (functions as time)
fed_only <- fed_only %>%
mutate(count = 1)
for (i in 2:nrow(fed_only)) {
if(fed_only$matchid[i]==fed_only$matchid[i-1]) {
fed_only$count[i] = fed_only$count[i-1] + 1
}
}
#Adjusting to remove NAs and replace with 0s for modelling
fed_only$o.angle[is.na(fed_only$o.angle)] <- 0
After a visual analysis of interaction between variables in the dataset, the response variables and covariates must be selected for the model.
Response variables are what we see occur as a result of Federer’s hidden state. The primary candidates for the models are: winner, speed.ratio and p.angle.
Covariates are factors that occur up to the time of the shot, and affect the transition probabilities from/to each hidden state. The covariates selected for modelling after visual examination are: oppo.speed, retser1, retser2, start.x, o.angle, oppo.hit.x, lag.p.angle, lag.speed.ratio, points.diff, games.diff, sets.diff
Models are broken down into three types. The first are simple models, independent of match score, the second are more detailed models, independent of match score and the third are detailed models, dependent on match score.
The two best models by comparing AIC values were Type 2, Model 1 and Type 3, Model 4. The latter is just the former plus covariates for the difference in points, games and sets.
Because the difference in AIC between these models is so small, and the -logLik is lower for Type 3 Model 4, I have decided to include points, games and set difference in the model for the sake of examining other players.
Part of the rationale for this decision is that other players may behave differently with different score differentials, even if Federer may be able to block out the score in a match and play consistantly throughout. For the sake of modelling other players in the future, the score dependent model (T3M4) is selected, even though it is not a significant improvement on the score independent model (T2M1).
Overall: The decision is to proceed with Model 4 from Type 3 category.
Response variables: winner, speed.ratio
Covariates: oppo.speed, retser1, retser2, start.x, o.angle, oppo.hit.x, lag.p.angle, lag.speed.ratio, points.diff, games.diff, sets.diff
library(depmixS4)
## Loading required package: nnet
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: Rsolnp
t3.mod4 <- depmix(list(winner ~ 1, speed.ratio ~ 1), transition = ~ oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x + lag.p.angle + lag.speed.ratio + points.diff + games.diff + sets.diff, data = fed_only, nstates = 2, family=list(multinomial("identity"),gaussian()))
t3.fm4<- fit(t3.mod4)
## iteration 0 logLik: -1724.967
## iteration 5 logLik: -1540.558
## iteration 10 logLik: -1499.004
## iteration 15 logLik: -1492.742
## iteration 20 logLik: -1489.153
## iteration 25 logLik: -1487.221
## iteration 30 logLik: -1486.235
## iteration 35 logLik: -1485.718
## iteration 40 logLik: -1485.432
## iteration 45 logLik: -1485.255
## iteration 50 logLik: -1485.133
## iteration 55 logLik: -1485.043
## iteration 60 logLik: -1484.973
## iteration 65 logLik: -1484.918
## iteration 70 logLik: -1484.877
## iteration 75 logLik: -1484.847
## iteration 80 logLik: -1484.829
## iteration 85 logLik: -1484.817
## iteration 90 logLik: -1484.81
## iteration 95 logLik: -1484.807
## iteration 100 logLik: -1484.805
## iteration 105 logLik: -1484.804
## iteration 110 logLik: -1484.804
## iteration 115 logLik: -1484.803
## converged at iteration 116 with logLik: -1484.803
summary(t3.fm4)
## Initial state probabilties model
## pr1 pr2
## 0 1
##
## Transition model for state (component) 1
## Model of type multinomial (mlogit), formula: ~oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x +
## lag.p.angle + lag.speed.ratio + points.diff + games.diff +
## sets.diff
## Coefficients:
## St1 St2
## (Intercept) 0 1.47554857
## oppo.speed 0 -0.06924244
## retser1 0 3.66921969
## retser2 0 10.59562384
## start.x 0 -0.17673631
## o.angle 0 0.04367224
## oppo.hit.x 0 -0.13092574
## lag.p.angle 0 0.30795910
## lag.speed.ratio 0 -0.13155692
## points.diff 0 -0.95286190
## games.diff 0 -0.10021897
## sets.diff 0 -0.04436444
## Probalities at zero values of the covariates.
## 0.1861007 0.8138993
##
## Transition model for state (component) 2
## Model of type multinomial (mlogit), formula: ~oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x +
## lag.p.angle + lag.speed.ratio + points.diff + games.diff +
## sets.diff
## Coefficients:
## St1 St2
## (Intercept) 0 -0.96857438
## oppo.speed 0 -0.01982070
## retser1 0 -0.03640242
## retser2 0 1.66544319
## start.x 0 -0.12155078
## o.angle 0 0.01197452
## oppo.hit.x 0 0.12681329
## lag.p.angle 0 -0.10240236
## lag.speed.ratio 0 0.98343017
## points.diff 0 0.19377559
## games.diff 0 -0.03281816
## sets.diff 0 -0.41287784
## Probalities at zero values of the covariates.
## 0.7248353 0.2751647
##
##
## Response parameters
## Resp 1 : multinomial
## Resp 2 : gaussian
## Re1.0 Re1.1 Re2.(Intercept) Re2.sd
## St1 0.710 0.290 1.326 0.521
## St2 0.956 0.044 0.819 0.275
The response parameters tell us about the characteristics of each state. What is found is that when two states are specified, they meet the criteria of ‘winner’ and ‘return’ as we expect.
State 1 is a ‘returning’ state, where the probability of hitting a winner is 0.044, and the speed.ratio of shots are 0.819, on average. This indicates slower shots with more conservative approach.
State 2 is a ‘winning’ or ‘aggressive’ state, where the probability of hitting a winner is 0.29, and the speed.ratio of shots are 1.326, on average.
Not only is this confirming our qualitative intuition, but it is telling us about how Federer behaves in matches, and shows that there is a clear difference between his shot intentions.
A limitation of this analysis is that it is examining only two states. Further analysis would consider more states to help classification. For example, if four states were used, there may be two returning states, one faster than the other, and two aggressive states, with one very fast, and one very slow (i.e. drop shots).
#Pull out parameters
pars <- getpars(t3.fm4)
#Creating temporary transition matrix
#Refresh function: Inputs
#a - oppo.speed
#b - retser1
#c - retser2
#d - start.x
#e - o.angle
#f - oppo.hit.x
#g - lag.p.angle
#h - lag.speed.ratio
#j - points.diff
#k - games.diff
#l - sets.diff
tr.refresh <- function(a,b,c,d,e,f,g,h,j,k,l) {
p11 <- 1/(1+exp(pars[[4]]+pars[[6]]*a+pars[[8]]*b+pars[[10]]*c+pars[[12]]*d+pars[[14]]*e+pars[[16]]*f+pars[[18]]*g+pars[[20]]*h+pars[[22]]*j+pars[[24]]*k+pars[[26]]*l))
p21 <- 1/(1+exp(pars[[28]]+pars[[30]]*a+pars[[32]]*b+pars[[34]]*c+pars[[36]]*d+pars[[38]]*e+pars[[40]]*f+pars[[42]]*g+pars[[44]]*h+pars[[46]]*j+pars[[48]]*k+pars[[50]]*l))
p12 <- 1 - p11
p22 <- 1 - p21
tr.temp <- as.matrix(c(p11,p21,p12,p22))
tr.temp <- matrix(tr.temp, ncol=2)
}
#Predicted states shell
pr.state <- matrix(0,nrow(fed_only),2)
#State 1
init.state <- matrix(c(pars[[1]],pars[[2]]),ncol=2) #initial state
a <- fed_only$oppo.speed[1]
b <- fed_only$retser1[1]
c <- fed_only$retser2[1]
d <- fed_only$start.x[1]
e <- fed_only$o.angle[1]
f <- fed_only$oppo.hit.x[1]
g <- fed_only$lag.p.angle[1]
h <- fed_only$lag.speed.ratio[1]
j <- fed_only$points.diff[1]
k <- fed_only$games.diff[1]
l <- fed_only$sets.diff[1]
trans <- tr.refresh(a,b,c,d,e,f,g,h,j,k,l)
pr.state[1,] <- init.state %*% trans
#States 2 onwards (holding probabilities)
for(i in 2:nrow(fed_only)) {
a <- fed_only$oppo.speed[i]
b <- fed_only$retser1[i]
c <- fed_only$retser2[i]
d <- fed_only$start.x[i]
e <- fed_only$o.angle[i]
f <- fed_only$oppo.hit.x[i]
g <- fed_only$lag.p.angle[i]
h <- fed_only$lag.speed.ratio[i]
j <- fed_only$points.diff[i]
k <- fed_only$games.diff[i]
l <- fed_only$sets.diff[i]
trans <- tr.refresh(a,b,c,d,e,f,g,h,j,k,l)
pr.state[i,] <- pr.state[i-1,] %*% trans
}
#Predicted states - also adjusts to make sure it selects correct state
if (pars[[52]] >= pars [[56]]) {
predict_df <- data.frame(prob.win=pr.state[,1])
} else {
predict_df <- data.frame(prob.win=1-pr.state[,1])
}
#Making conclusions on state
predict_df <- predict_df %>% rowwise %>%
mutate(pred.win = ifelse(prob.win <= 0.5, 0, 1))
fed_pred <- cbind(fed_only, predict_df)
#Remember that state 1 is winner, state 2 is returning
Now plotting the probabilities for each shot, for every game
ggplot(fed_pred, aes(x=count,y=prob.win)) + geom_line(color="blue", size=0.3) + geom_smooth(span = 0.1, se=FALSE, color="red", size = 0.4) + facet_wrap(~factor(opponent), scale="free") + xlab("Time") + ylab("Probability of Being in Aggressive/Winning State") + ggtitle("Predicting Federer's Shot Intentions Across Matches in the 2017 Australian Open")
## `geom_smooth()` using method = 'loess'
This plot shows the probability of being in a ‘winning’ or ‘aggressive’ state throughout each of Federer’s matches. The red lines are fitted lines that give us a lower resolution picture of Federer’s behaviour throughout the games.
Further visualization involving faceting with other variables may give more insight into Federer’s behavior. There was difficultly faceting by opponent and by other variables.
For the sake of more visualization, let’s consider Federer’s game against Berdych. This match is chosen because the probability of being in aggressive state increases as the game goes on (i.e. upwards trend).
#Berdych match
fed_berd <- fed_pred %>%
filter(opponent == "BERDYCH")
fed_berd <- fed_berd %>%
gather(key = vars, value = measurement, prob.win, oppo.speed, start.x, o.angle, oppo.hit.x, points.diff, games.diff, sets.diff)
ggplot(fed_berd, aes(x=count,y=measurement)) + geom_line(color="blue") + facet_grid(vars~., scale="free_y") + xlab("Time") + ggtitle("Federer vs. Berdych: Looking at how variables move together over the match")
This contains all models run in determining best fit, and the visual plots.
Here are a series of plots which give us an idea of how each variable affects hitting a winner.
Looking at these plots, we try to determine which variables may be used as response variables, meaning that they make up part of the state classification, or as a covariate, meaning they affect the likelihood of being in a state.
##Plots of shot and ballmark co-ordinates, angles made by shots and speed measures
library(ggplot2)
#X Co-ordinate of shot
ggplot(fed_only, aes(x=start.x,winner)) + geom_smooth() + xlab("X Coordinate of Shot") + ylab("Proportion of Winners") + ggtitle("Winners By X Co-Ordinate")
## `geom_smooth()` using method = 'gam'
#Opponents shot before
library(hexbin)
ggplot(fed_only, aes(x=oppo.hit.x,y=oppo.hit.y,z=winner)) + stat_summary_hex(fun = function(winner) sum(winner)) + xlab("X Coordinate of Oppo Shot") + ylab("Y Coordinate of Oppo Shot") + ggtitle("Count of Winners By Opponent's Shot Co-Ordinates")
### Get help transforming this to proportion in each bin instead of raw count
#Fed Shots on x-y plane
ggplot(fed_only, aes(x=start.x,y=start.y,z=winner)) + stat_summary_hex(fun = function(winner) sum(winner)) + xlab("X Coordinate") + ylab("Y Coordinate") + ggtitle("Winners By Shot Co-Ordinates")
#Fed Ballmark by winner in x-y plane
ggplot(fed_only, aes(x=ballmark.x,y=ballmark.y,z=winner)) + stat_summary_hex(fun = function(winner) sum(winner)) + xlab("X Coordinate") + ylab("Y Coordinate") + ggtitle("Winners By Shot Ballmark Co-Ordinates")
#Angle of shots
ggplot(fed_only, aes(x=o.angle,y=winner)) + geom_smooth() + xlab("Angle Made By Opponents Shot With Fed's Previous Shot") + ylab("Proportion of Winners") + ggtitle("Winners By Opponent Shot Angle")
## `geom_smooth()` using method = 'gam'
ggplot(fed_only, aes(x=p.angle,y=winner)) + geom_smooth() + xlab("Angle Made By Federer's Shot") + ylab("Proportion of Winners") + ggtitle("Winners By Federer's Shot Angle")
## `geom_smooth()` using method = 'gam'
ggplot(fed_only) + geom_density(aes(p.angle,group=factor(winner),color=factor(winner))) + ggtitle("Density of Winners by Fed Shot Angles")
ggplot(fed_only) + geom_density(aes(o.angle,group=factor(winner),color=factor(winner))) + ggtitle("Density of Winners by Opponent Shot Angles")
#Winners by oppo speed
ggplot(fed_only) + geom_density(aes(oppo.speed, group = factor(winner), color=factor(winner)))
ggplot(fed_only, aes(x=factor(winner),y=oppo.speed,fill=factor(winner))) + geom_boxplot() + ggtitle("Winner by Opponent Speed")
#Winners by speed ratio
ggplot(fed_only) + geom_density(aes(speed.ratio, group = factor(winner), color=factor(winner)))
ggplot(fed_only, aes(x=factor(winner),y=speed.ratio,fill=factor(winner))) + geom_boxplot() + ggtitle("Winner by Speed Ratio")
#Box plots across multiple categories
df_long1 <- gather(data = fed_only, key = variables, value = stat, speed1, final.shot, shot)
ggplot(df_long1) + geom_boxplot(aes(x=factor(winner), y=stat, fill=factor(winner))) + facet_wrap(~variables, scale = "free_y") + ggtitle("Differences In Characteristics of Winner/Not")
#Boxplots for speed by winner, facet by sets difference
ggplot(fed_only, aes(x=factor(winner),y=speed1,fill=factor(winner))) + geom_boxplot() + facet_grid(.~ sets.diff) + ggtitle("Speed of Shots For Difference in Sets Won")
#Boxplots for speed by winner, facet by games difference
ggplot(fed_only, aes(x=factor(winner),y=speed1,fill=factor(winner))) + geom_boxplot() + facet_grid(.~ games.diff) + ggtitle("Speed of Shots For Difference in Games Won")
#Boxplots for speed by winner, facet by points difference
ggplot(fed_only, aes(x=factor(winner),y=speed1,fill=factor(winner))) + geom_boxplot() + facet_grid(.~ points.diff) + ggtitle("Speed of Shots For Difference in Points Won")
#Boxplots for speed.ratio by winner, facet by sets difference
ggplot(fed_only, aes(x=factor(winner),y=speed.ratio,fill=factor(winner))) + geom_boxplot() + facet_grid(.~ sets.diff) + ggtitle("Difference in Speed Ratio For Difference in Sets Won")
#Boxplots for speed.ratio by winner, facet by games difference
ggplot(fed_only, aes(x=factor(winner),y=speed.ratio,fill=factor(winner))) + geom_boxplot() + facet_grid(.~ games.diff) + ggtitle("Difference in Speed Ratio For Difference in Games Won")
#Boxplots for speed.ratio by winner, facet by points difference
ggplot(fed_only, aes(x=factor(winner),y=speed.ratio,fill=factor(winner))) + geom_boxplot() + facet_grid(.~ points.diff) + ggtitle("Difference in Speed Ratio For Difference in Points Won")
#Proportion of winners by forehand/backhand
fed_hand <- na.omit(fed_only) %>%
group_by(hitpoint,winner) %>%
summarise(n=n()) %>%
mutate(proportion = n/sum(n))
ggplot(fed_hand, aes(x=factor(winner),y=proportion,color=factor(winner))) + geom_point() + facet_grid(~hitpoint,labeller = label_both) + ggtitle("Proportion of Winners for Backhand/Forehand Shots")
#Proportion of winners for returning serve
fed_ret <- fed_only %>%
group_by(retser,retser1,winner) %>%
summarise(n=n()) %>%
mutate(proportion = n/sum(n))
## Warning: Grouping rowwise data frame strips rowwise nature
ggplot(fed_ret, aes(x=factor(winner),y=proportion,color=factor(winner))) + geom_point() + facet_wrap(~retser ~retser1,labeller = label_both) + ggtitle("Proportion of Winners for Returning Serve - For Both Serve 1 and 2")
#Plotting Importance
ggplot(fed_only, aes(x=importance,y=winner)) + geom_smooth() + xlab("Importance") + ylab("Proportion of Winners") + labs(title="Proportion of Winners against Importance")
## `geom_smooth()` using method = 'gam'
ggplot(fed_only) + geom_density(aes(importance, group = factor(winner), color=factor(winner))) + labs(title="Density of Importance by Winners")
Response: winner Covariates: oppo.speed, retser1, retser2, start.x
library(depmixS4)
library(dplyr)
t1.mod1 <- depmix(winner ~ 1, transition = ~ oppo.speed + retser1 + retser2 + start.x, data = fed_only, nstates = 2, family=multinomial("identity"))
t1.fm1 <- fit(t1.mod1)
## iteration 0 logLik: -663.558
## iteration 5 logLik: -663.5557
## iteration 10 logLik: -663.5507
## iteration 15 logLik: -663.5381
## iteration 20 logLik: -663.5054
## iteration 25 logLik: -663.421
## iteration 30 logLik: -663.2069
## iteration 35 logLik: -662.6893
## iteration 40 logLik: -661.5691
## iteration 45 logLik: -659.6325
## iteration 50 logLik: -657.2954
## iteration 55 logLik: -655.4469
## iteration 60 logLik: -654.4127
## iteration 65 logLik: -653.9212
## iteration 70 logLik: -653.6773
## iteration 75 logLik: -653.5343
## iteration 80 logLik: -653.4368
## iteration 85 logLik: -653.3645
## iteration 90 logLik: -653.3085
## iteration 95 logLik: -653.2645
## iteration 100 logLik: -653.2291
## iteration 105 logLik: -653.2001
## iteration 110 logLik: -653.1757
## iteration 115 logLik: -653.1547
## iteration 120 logLik: -653.1361
## iteration 125 logLik: -653.119
## iteration 130 logLik: -653.1029
## iteration 135 logLik: -653.0872
## iteration 140 logLik: -653.072
## iteration 145 logLik: -653.0568
## iteration 150 logLik: -653.0414
## iteration 155 logLik: -653.0257
## iteration 160 logLik: -653.0095
## iteration 165 logLik: -652.9928
## iteration 170 logLik: -652.9756
## iteration 175 logLik: -652.9574
## iteration 180 logLik: -652.9383
## iteration 185 logLik: -652.9184
## iteration 190 logLik: -652.8977
## iteration 195 logLik: -652.8759
## iteration 200 logLik: -652.853
## iteration 205 logLik: -652.8291
## iteration 210 logLik: -652.8041
## iteration 215 logLik: -652.778
## iteration 220 logLik: -652.7507
## iteration 225 logLik: -652.7224
## iteration 230 logLik: -652.6929
## iteration 235 logLik: -652.6624
## iteration 240 logLik: -652.6308
## iteration 245 logLik: -652.5975
## iteration 250 logLik: -652.5631
## iteration 255 logLik: -652.5276
## iteration 260 logLik: -652.4911
## iteration 265 logLik: -652.4537
## iteration 270 logLik: -652.4155
## iteration 275 logLik: -652.3768
## iteration 280 logLik: -652.3375
## iteration 285 logLik: -652.2981
## iteration 290 logLik: -652.2589
## iteration 295 logLik: -652.22
## iteration 300 logLik: -652.1816
## iteration 305 logLik: -652.1442
## iteration 310 logLik: -652.1076
## iteration 315 logLik: -652.0724
## iteration 320 logLik: -652.0386
## iteration 325 logLik: -652.0063
## iteration 330 logLik: -651.9755
## iteration 335 logLik: -651.9464
## iteration 340 logLik: -651.9189
## iteration 345 logLik: -651.8932
## iteration 350 logLik: -651.8691
## iteration 355 logLik: -651.8467
## iteration 360 logLik: -651.8258
## iteration 365 logLik: -651.8065
## iteration 370 logLik: -651.7887
## iteration 375 logLik: -651.7724
## iteration 380 logLik: -651.7573
## iteration 385 logLik: -651.7481
## iteration 390 logLik: -651.7366
## iteration 395 logLik: -651.7246
## iteration 400 logLik: -651.7136
## iteration 405 logLik: -651.7036
## iteration 410 logLik: -651.6945
## iteration 415 logLik: -651.6861
## iteration 420 logLik: -651.6808
## iteration 425 logLik: -651.6769
## iteration 430 logLik: -651.6707
## iteration 435 logLik: -651.6643
## iteration 440 logLik: -651.6585
## iteration 445 logLik: -651.6538
## iteration 450 logLik: -651.6494
## iteration 455 logLik: -651.6475
## iteration 460 logLik: -651.6458
## iteration 465 logLik: -651.6441
## iteration 470 logLik: -651.6427
## iteration 475 logLik: -651.6413
## iteration 480 logLik: -651.6399
## iteration 485 logLik: -651.6387
## iteration 490 logLik: -651.6375
## iteration 495 logLik: -651.6363
## iteration 500 logLik: -651.6351
summary(t1.fm1)
## Initial state probabilties model
## pr1 pr2
## 1 0
##
## Transition model for state (component) 1
## Model of type multinomial (mlogit), formula: ~oppo.speed + retser1 + retser2 + start.x
## Coefficients:
## St1 St2
## (Intercept) 0 2.41594376
## oppo.speed 0 0.03812658
## retser1 0 0.22733735
## retser2 0 -1.89074910
## start.x 0 0.25462285
## Probalities at zero values of the covariates.
## 0.08196496 0.918035
##
## Transition model for state (component) 2
## Model of type multinomial (mlogit), formula: ~oppo.speed + retser1 + retser2 + start.x
## Coefficients:
## St1 St2
## (Intercept) 0 7.8662030
## oppo.speed 0 0.1319348
## retser1 0 0.5780293
## retser2 0 -8.6265028
## start.x 0 1.1827889
## Probalities at zero values of the covariates.
## 0.0003833407 0.9996167
##
##
## Response parameters
## Resp 1 : multinomial
## Re1.0 Re1.1
## St1 0.957 0.043
## St2 0.827 0.173
State 1 is returning as there is 0.052 probability of hitting a winner State 2 is more aggressive, with 0.226 chance of hitting a winner
Response: winner Covariates: oppo.speed, retser1, retser2, start.x, o.angle
library(depmixS4)
t1.mod2 <- depmix(winner ~ 1, transition = ~ oppo.speed + retser1 + retser2 + start.x + o.angle, data = fed_only, nstates = 2, family=multinomial("identity"))
t1.fm2 <- fit(t1.mod2)
## iteration 0 logLik: -663.554
## iteration 5 logLik: -663.5266
## iteration 10 logLik: -663.4658
## iteration 15 logLik: -663.3078
## iteration 20 logLik: -662.9013
## iteration 25 logLik: -661.9367
## iteration 30 logLik: -660.0377
## iteration 35 logLik: -657.3688
## iteration 40 logLik: -654.963
## iteration 45 logLik: -653.4983
## iteration 50 logLik: -652.7485
## iteration 55 logLik: -652.3335
## iteration 60 logLik: -652.0611
## iteration 65 logLik: -651.8616
## iteration 70 logLik: -651.7096
## iteration 75 logLik: -651.5922
## iteration 80 logLik: -651.5011
## iteration 85 logLik: -651.4294
## iteration 90 logLik: -651.372
## iteration 95 logLik: -651.3246
## iteration 100 logLik: -651.2838
## iteration 105 logLik: -651.2472
## iteration 110 logLik: -651.2131
## iteration 115 logLik: -651.18
## iteration 120 logLik: -651.1468
## iteration 125 logLik: -651.1131
## iteration 130 logLik: -651.0783
## iteration 135 logLik: -651.042
## iteration 140 logLik: -651.004
## iteration 145 logLik: -650.9641
## iteration 150 logLik: -650.922
## iteration 155 logLik: -650.8776
## iteration 160 logLik: -650.8309
## iteration 165 logLik: -650.782
## iteration 170 logLik: -650.7309
## iteration 175 logLik: -650.6776
## iteration 180 logLik: -650.6224
## iteration 185 logLik: -650.5655
## iteration 190 logLik: -650.507
## iteration 195 logLik: -650.4472
## iteration 200 logLik: -650.3864
## iteration 205 logLik: -650.3248
## iteration 210 logLik: -650.2627
## iteration 215 logLik: -650.2003
## iteration 220 logLik: -650.1377
## iteration 225 logLik: -650.0751
## iteration 230 logLik: -650.0127
## iteration 235 logLik: -649.9505
## iteration 240 logLik: -649.8884
## iteration 245 logLik: -649.8333
## iteration 250 logLik: -649.7715
## iteration 255 logLik: -649.7092
## iteration 260 logLik: -649.6468
## iteration 265 logLik: -649.5837
## iteration 270 logLik: -649.5194
## iteration 275 logLik: -649.4534
## iteration 280 logLik: -649.3852
## iteration 285 logLik: -649.3142
## iteration 290 logLik: -649.2395
## iteration 295 logLik: -649.1604
## iteration 300 logLik: -649.0761
## iteration 305 logLik: -648.9858
## iteration 310 logLik: -648.889
## iteration 315 logLik: -648.7856
## iteration 320 logLik: -648.6755
## iteration 325 logLik: -648.5599
## iteration 330 logLik: -648.4392
## iteration 335 logLik: -648.3153
## iteration 340 logLik: -648.1896
## iteration 345 logLik: -648.0644
## iteration 350 logLik: -647.9411
## iteration 355 logLik: -647.8216
## iteration 360 logLik: -647.707
## iteration 365 logLik: -647.5989
## iteration 370 logLik: -647.4974
## iteration 375 logLik: -647.4018
## iteration 380 logLik: -647.3117
## iteration 385 logLik: -647.2276
## iteration 390 logLik: -647.1489
## iteration 395 logLik: -647.0749
## iteration 400 logLik: -647.0045
## iteration 405 logLik: -646.9375
## iteration 410 logLik: -646.8724
## iteration 415 logLik: -646.8082
## iteration 420 logLik: -646.7464
## iteration 425 logLik: -646.6844
## iteration 430 logLik: -646.6223
## iteration 435 logLik: -646.559
## iteration 440 logLik: -646.4933
## iteration 445 logLik: -646.425
## iteration 450 logLik: -646.3531
## iteration 455 logLik: -646.2777
## iteration 460 logLik: -646.1981
## iteration 465 logLik: -646.1144
## iteration 470 logLik: -646.0431
## iteration 475 logLik: -645.9537
## iteration 480 logLik: -645.8704
## iteration 485 logLik: -645.7838
## iteration 490 logLik: -645.6983
## iteration 495 logLik: -645.6257
## iteration 500 logLik: -645.5686
summary(t1.fm2)
## Initial state probabilties model
## pr1 pr2
## 1 0
##
## Transition model for state (component) 1
## Model of type multinomial (mlogit), formula: ~oppo.speed + retser1 + retser2 + start.x + o.angle
## Coefficients:
## St1 St2
## (Intercept) 0 -3.70995285
## oppo.speed 0 0.10832878
## retser1 0 0.82595757
## retser2 0 -1.59858713
## start.x 0 0.03081916
## o.angle 0 0.59980312
## Probalities at zero values of the covariates.
## 0.9761062 0.02389379
##
## Transition model for state (component) 2
## Model of type multinomial (mlogit), formula: ~oppo.speed + retser1 + retser2 + start.x + o.angle
## Coefficients:
## St1 St2
## (Intercept) 0 39.88354484
## oppo.speed 0 0.02755278
## retser1 0 -2.19544123
## retser2 0 -17.54909238
## start.x 0 3.05868311
## o.angle 0 -1.70618938
## Probalities at zero values of the covariates.
## 4.773056e-18 1
##
##
## Response parameters
## Resp 1 : multinomial
## Re1.0 Re1.1
## St1 0.961 0.039
## St2 0.841 0.159
State 1 is returning as there is 0.000 probability of hitting a winner State 2 is more aggressive, with 0.227 chance of hitting a winner
Response: winner Covariates: oppo.speed, retser1, retser2, start.x, o.angle, oppo.hit.x
library(depmixS4)
t1.mod3 <- depmix(winner ~ 1, transition = ~ oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x, data = fed_only, nstates = 2, family=multinomial("identity"))
t1.fm3<- fit(t1.mod3)
## iteration 0 logLik: -663.5497
## iteration 5 logLik: -663.4947
## iteration 10 logLik: -663.368
## iteration 15 logLik: -663.0324
## iteration 20 logLik: -662.1882
## iteration 25 logLik: -660.3898
## iteration 30 logLik: -657.6065
## iteration 35 logLik: -654.8257
## iteration 40 logLik: -652.9274
## iteration 45 logLik: -651.7885
## iteration 50 logLik: -651.0325
## iteration 55 logLik: -650.4706
## iteration 60 logLik: -650.0399
## iteration 65 logLik: -649.7141
## iteration 70 logLik: -649.4721
## iteration 75 logLik: -649.2929
## iteration 80 logLik: -649.1576
## iteration 85 logLik: -649.0504
## iteration 90 logLik: -648.9597
## iteration 95 logLik: -648.8769
## iteration 100 logLik: -648.7964
## iteration 105 logLik: -648.7145
## iteration 110 logLik: -648.629
## iteration 115 logLik: -648.5387
## iteration 120 logLik: -648.4433
## iteration 125 logLik: -648.3428
## iteration 130 logLik: -648.2375
## iteration 135 logLik: -648.1278
## iteration 140 logLik: -648.0136
## iteration 145 logLik: -647.8952
## iteration 150 logLik: -647.773
## iteration 155 logLik: -647.6453
## iteration 160 logLik: -647.5143
## iteration 165 logLik: -647.3757
## iteration 170 logLik: -647.2314
## iteration 175 logLik: -647.0823
## iteration 180 logLik: -646.9261
## iteration 185 logLik: -646.7646
## iteration 190 logLik: -646.5989
## iteration 195 logLik: -646.4556
## iteration 200 logLik: -646.2877
## iteration 205 logLik: -646.1231
## iteration 210 logLik: -645.9652
## iteration 215 logLik: -645.8174
## iteration 220 logLik: -645.6826
## iteration 225 logLik: -645.5624
## iteration 230 logLik: -645.4573
## iteration 235 logLik: -645.367
## iteration 240 logLik: -645.2903
## iteration 245 logLik: -645.2259
## iteration 250 logLik: -645.1721
## iteration 255 logLik: -645.1271
## iteration 260 logLik: -645.0896
## iteration 265 logLik: -645.0582
## iteration 270 logLik: -645.0316
## iteration 275 logLik: -645.009
## iteration 280 logLik: -644.9954
## iteration 285 logLik: -644.9847
## iteration 290 logLik: -644.9747
## iteration 295 logLik: -644.9654
## iteration 300 logLik: -644.9566
## iteration 305 logLik: -644.9483
## iteration 310 logLik: -644.9404
## iteration 315 logLik: -644.9329
## iteration 320 logLik: -644.9257
## iteration 325 logLik: -644.9188
## iteration 330 logLik: -644.9121
## iteration 335 logLik: -644.9056
## iteration 340 logLik: -644.8994
## iteration 345 logLik: -644.8932
## iteration 350 logLik: -644.8877
## iteration 355 logLik: -644.8818
## iteration 360 logLik: -644.8761
## iteration 365 logLik: -644.8709
## iteration 370 logLik: -644.8656
## iteration 375 logLik: -644.8603
## iteration 380 logLik: -644.8552
## iteration 385 logLik: -644.8502
## iteration 390 logLik: -644.8453
## iteration 395 logLik: -644.8405
## iteration 400 logLik: -644.8358
## iteration 405 logLik: -644.831
## iteration 410 logLik: -644.826
## iteration 415 logLik: -644.8211
## iteration 420 logLik: -644.8162
## iteration 425 logLik: -644.8114
## iteration 430 logLik: -644.8055
## iteration 435 logLik: -644.7998
## iteration 440 logLik: -644.7924
## iteration 445 logLik: -644.7862
## iteration 450 logLik: -644.779
## iteration 455 logLik: -644.7735
## iteration 460 logLik: -644.7681
## iteration 465 logLik: -644.7621
## iteration 470 logLik: -644.7548
## iteration 475 logLik: -644.7487
## iteration 480 logLik: -644.7427
## iteration 485 logLik: -644.7367
## iteration 490 logLik: -644.7301
## iteration 495 logLik: -644.724
## iteration 500 logLik: -644.7177
summary(t1.fm3)
## Initial state probabilties model
## pr1 pr2
## 0 1
##
## Transition model for state (component) 1
## Model of type multinomial (mlogit), formula: ~oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x
## Coefficients:
## St1 St2
## (Intercept) 0 -36.2714661
## oppo.speed 0 -0.1175994
## retser1 0 2.9170394
## retser2 0 14.0571912
## start.x 0 -1.8310760
## o.angle 0 0.8333138
## oppo.hit.x 0 1.3707758
## Probalities at zero values of the covariates.
## 1 1.768082e-16
##
## Transition model for state (component) 2
## Model of type multinomial (mlogit), formula: ~oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x
## Coefficients:
## St1 St2
## (Intercept) 0 2.52054514
## oppo.speed 0 -0.06345462
## retser1 0 1.09615610
## retser2 0 3.25015288
## start.x 0 -0.11651702
## o.angle 0 0.05871708
## oppo.hit.x 0 -0.28377314
## Probalities at zero values of the covariates.
## 0.07443038 0.9255696
##
##
## Response parameters
## Resp 1 : multinomial
## Re1.0 Re1.1
## St1 0.830 0.170
## St2 0.966 0.034
Response: winner Covariates: oppo.speed, retser1, retser2, start.x, o.angle, oppo.hit.x, oppo.ballmark.x
library(depmixS4)
t1.mod4 <- depmix(winner ~ 1, transition = ~ oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x + oppo.ballmark.x, data = fed_only, nstates = 2, family=multinomial("identity"))
t1.fm4<- fit(t1.mod4)
## iteration 0 logLik: -663.5563
## iteration 5 logLik: -663.5432
## iteration 10 logLik: -663.5115
## iteration 15 logLik: -663.4222
## iteration 20 logLik: -663.17
## iteration 25 logLik: -662.4937
## iteration 30 logLik: -660.9045
## iteration 35 logLik: -658.0768
## iteration 40 logLik: -654.7987
## iteration 45 logLik: -652.327
## iteration 50 logLik: -650.8201
## iteration 55 logLik: -649.8543
## iteration 60 logLik: -649.1605
## iteration 65 logLik: -648.6427
## iteration 70 logLik: -648.2637
## iteration 75 logLik: -647.9949
## iteration 80 logLik: -647.8083
## iteration 85 logLik: -647.6789
## iteration 90 logLik: -647.5874
## iteration 95 logLik: -647.5203
## iteration 100 logLik: -647.4687
## iteration 105 logLik: -647.4269
## iteration 110 logLik: -647.3911
## iteration 115 logLik: -647.3593
## iteration 120 logLik: -647.3298
## iteration 125 logLik: -647.3018
## iteration 130 logLik: -647.2746
## iteration 135 logLik: -647.248
## iteration 140 logLik: -647.2225
## iteration 145 logLik: -647.196
## iteration 150 logLik: -647.17
## iteration 155 logLik: -647.143
## iteration 160 logLik: -647.1169
## iteration 165 logLik: -647.089
## iteration 170 logLik: -647.0606
## iteration 175 logLik: -647.0315
## iteration 180 logLik: -647.0017
## iteration 185 logLik: -646.9713
## iteration 190 logLik: -646.94
## iteration 195 logLik: -646.908
## iteration 200 logLik: -646.8751
## iteration 205 logLik: -646.8412
## iteration 210 logLik: -646.8063
## iteration 215 logLik: -646.7708
## iteration 220 logLik: -646.7338
## iteration 225 logLik: -646.6959
## iteration 230 logLik: -646.6569
## iteration 235 logLik: -646.617
## iteration 240 logLik: -646.5758
## iteration 245 logLik: -646.5336
## iteration 250 logLik: -646.4902
## iteration 255 logLik: -646.4456
## iteration 260 logLik: -646.3998
## iteration 265 logLik: -646.3525
## iteration 270 logLik: -646.3037
## iteration 275 logLik: -646.2532
## iteration 280 logLik: -646.2007
## iteration 285 logLik: -646.1459
## iteration 290 logLik: -646.0886
## iteration 295 logLik: -646.0284
## iteration 300 logLik: -645.9649
## iteration 305 logLik: -645.8978
## iteration 310 logLik: -645.8269
## iteration 315 logLik: -645.7522
## iteration 320 logLik: -645.6736
## iteration 325 logLik: -645.5914
## iteration 330 logLik: -645.5063
## iteration 335 logLik: -645.419
## iteration 340 logLik: -645.3307
## iteration 345 logLik: -645.2425
## iteration 350 logLik: -645.1552
## iteration 355 logLik: -645.0701
## iteration 360 logLik: -644.9876
## iteration 365 logLik: -644.9207
## iteration 370 logLik: -644.8439
## iteration 375 logLik: -644.7702
## iteration 380 logLik: -644.6991
## iteration 385 logLik: -644.6306
## iteration 390 logLik: -644.5645
## iteration 395 logLik: -644.5004
## iteration 400 logLik: -644.4471
## iteration 405 logLik: -644.3854
## iteration 410 logLik: -644.3244
## iteration 415 logLik: -644.2645
## iteration 420 logLik: -644.2059
## iteration 425 logLik: -644.1482
## iteration 430 logLik: -644.0921
## iteration 435 logLik: -644.037
## iteration 440 logLik: -643.9844
## iteration 445 logLik: -643.9387
## iteration 450 logLik: -643.8911
## iteration 455 logLik: -643.8528
## iteration 460 logLik: -643.8134
## iteration 465 logLik: -643.7751
## iteration 470 logLik: -643.7403
## iteration 475 logLik: -643.7096
## iteration 480 logLik: -643.684
## iteration 485 logLik: -643.6596
## iteration 490 logLik: -643.6388
## iteration 495 logLik: -643.6266
## iteration 500 logLik: -643.6159
summary(t1.fm4)
## Initial state probabilties model
## pr1 pr2
## 1 0
##
## Transition model for state (component) 1
## Model of type multinomial (mlogit), formula: ~oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x +
## oppo.ballmark.x
## Coefficients:
## St1 St2
## (Intercept) 0 1.70935709
## oppo.speed 0 0.02807223
## retser1 0 1.00171687
## retser2 0 -1.24024275
## start.x 0 0.33587397
## o.angle 0 0.06549177
## oppo.hit.x 0 0.04689092
## oppo.ballmark.x 0 -0.08514934
## Probalities at zero values of the covariates.
## 0.1532471 0.8467529
##
## Transition model for state (component) 2
## Model of type multinomial (mlogit), formula: ~oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x +
## oppo.ballmark.x
## Coefficients:
## St1 St2
## (Intercept) 0 57.0242308
## oppo.speed 0 0.2850711
## retser1 0 -2.5424468
## retser2 0 -19.2851729
## start.x 0 3.4453947
## o.angle 0 -1.3845546
## oppo.hit.x 0 -2.7888904
## oppo.ballmark.x 0 -1.2832330
## Probalities at zero values of the covariates.
## 1.716687e-25 1
##
##
## Response parameters
## Resp 1 : multinomial
## Re1.0 Re1.1
## St1 0.963 0.037
## St2 0.817 0.183
Response: winner Covariates: oppo.speed, retser1, retser2, start.x, o.angle, oppo.hit.x, lag.p.angle, lag.speed.ratio
library(depmixS4)
t1.mod5 <- depmix(winner ~ 1, transition = ~ oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x + lag.p.angle + lag.speed.ratio, data = fed_only, nstates = 2, family=multinomial("identity"))
t1.fm5 <- fit(t1.mod5)
## iteration 0 logLik: -663.5508
## iteration 5 logLik: -663.5012
## iteration 10 logLik: -663.3808
## iteration 15 logLik: -663.0447
## iteration 20 logLik: -662.1538
## iteration 25 logLik: -660.158
## iteration 30 logLik: -656.9553
## iteration 35 logLik: -653.6837
## iteration 40 logLik: -651.327
## iteration 45 logLik: -649.7476
## iteration 50 logLik: -648.6254
## iteration 55 logLik: -647.7909
## iteration 60 logLik: -647.165
## iteration 65 logLik: -646.7055
## iteration 70 logLik: -646.3764
## iteration 75 logLik: -646.1438
## iteration 80 logLik: -645.9787
## iteration 85 logLik: -645.8586
## iteration 90 logLik: -645.7675
## iteration 95 logLik: -645.6946
## iteration 100 logLik: -645.6326
## iteration 105 logLik: -645.5772
## iteration 110 logLik: -645.528
## iteration 115 logLik: -645.4789
## iteration 120 logLik: -645.4315
## iteration 125 logLik: -645.3852
## iteration 130 logLik: -645.3396
## iteration 135 logLik: -645.2944
## iteration 140 logLik: -645.2493
## iteration 145 logLik: -645.2058
## iteration 150 logLik: -645.1597
## iteration 155 logLik: -645.1134
## iteration 160 logLik: -645.0659
## iteration 165 logLik: -645.0171
## iteration 170 logLik: -644.9664
## iteration 175 logLik: -644.9137
## iteration 180 logLik: -644.8584
## iteration 185 logLik: -644.8004
## iteration 190 logLik: -644.7392
## iteration 195 logLik: -644.6746
## iteration 200 logLik: -644.6065
## iteration 205 logLik: -644.5344
## iteration 210 logLik: -644.4582
## iteration 215 logLik: -644.3778
## iteration 220 logLik: -644.2931
## iteration 225 logLik: -644.204
## iteration 230 logLik: -644.1103
## iteration 235 logLik: -644.0119
## iteration 240 logLik: -643.9085
## iteration 245 logLik: -643.8064
## iteration 250 logLik: -643.6925
## iteration 255 logLik: -643.5788
## iteration 260 logLik: -643.4517
## iteration 265 logLik: -643.3162
## iteration 270 logLik: -643.1812
## iteration 275 logLik: -643.0365
## iteration 280 logLik: -642.8655
## iteration 285 logLik: -642.675
## iteration 290 logLik: -642.4592
## iteration 295 logLik: -642.2112
## iteration 300 logLik: -641.9237
## iteration 305 logLik: -641.5912
## iteration 310 logLik: -641.2151
## iteration 315 logLik: -640.8056
## iteration 320 logLik: -640.3804
## iteration 325 logLik: -639.9592
## iteration 330 logLik: -639.5594
## iteration 335 logLik: -639.1943
## iteration 340 logLik: -638.8723
## iteration 345 logLik: -638.5962
## iteration 350 logLik: -638.3642
## iteration 355 logLik: -638.171
## iteration 360 logLik: -638.0096
## iteration 365 logLik: -637.8727
## iteration 370 logLik: -637.7534
## iteration 375 logLik: -637.6458
## iteration 380 logLik: -637.5456
## iteration 385 logLik: -637.4498
## iteration 390 logLik: -637.3565
## iteration 395 logLik: -637.2653
## iteration 400 logLik: -637.176
## iteration 405 logLik: -637.0893
## iteration 410 logLik: -637.0059
## iteration 415 logLik: -636.9258
## iteration 420 logLik: -636.849
## iteration 425 logLik: -636.7752
## iteration 430 logLik: -636.7057
## iteration 435 logLik: -636.6356
## iteration 440 logLik: -636.5665
## iteration 445 logLik: -636.498
## iteration 450 logLik: -636.4293
## iteration 455 logLik: -636.3604
## iteration 460 logLik: -636.2922
## iteration 465 logLik: -636.2243
## iteration 470 logLik: -636.1577
## iteration 475 logLik: -636.0951
## iteration 480 logLik: -636.0376
## iteration 485 logLik: -635.9878
## iteration 490 logLik: -635.9452
## iteration 495 logLik: -635.9078
## iteration 500 logLik: -635.874
summary(t1.fm5)
## Initial state probabilties model
## pr1 pr2
## 1 0
##
## Transition model for state (component) 1
## Model of type multinomial (mlogit), formula: ~oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x +
## lag.p.angle + lag.speed.ratio
## Coefficients:
## St1 St2
## (Intercept) 0 -9.0669807
## oppo.speed 0 -0.4959859
## retser1 0 12.5150619
## retser2 0 -0.4332389
## start.x 0 -1.2293899
## o.angle 0 0.6846242
## oppo.hit.x 0 1.2040900
## lag.p.angle 0 -0.1049277
## lag.speed.ratio 0 -5.9548854
## Probalities at zero values of the covariates.
## 0.9998846 0.0001154012
##
## Transition model for state (component) 2
## Model of type multinomial (mlogit), formula: ~oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x +
## lag.p.angle + lag.speed.ratio
## Coefficients:
## St1 St2
## (Intercept) 0 35.785160
## oppo.speed 0 0.302332
## retser1 0 -9.711250
## retser2 0 -12.108599
## start.x 0 1.992285
## o.angle 0 -1.706349
## oppo.hit.x 0 -1.299362
## lag.p.angle 0 0.653950
## lag.speed.ratio 0 -5.562465
## Probalities at zero values of the covariates.
## 2.875427e-16 1
##
##
## Response parameters
## Resp 1 : multinomial
## Re1.0 Re1.1
## St1 0.969 0.031
## St2 0.840 0.160
AIC(t1.fm1)
## [1] 1329.27
AIC(t1.fm2)
## [1] 1321.137
AIC(t1.fm3)
## [1] 1323.435
AIC(t1.fm4)
## [1] 1325.232
AIC(t1.fm5)
## [1] 1313.748
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
lrtest(t1.fm3,t1.fm5)
## Likelihood ratio test
##
## Model 1: t1.fm3
## Model 2: t1.fm5
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 17 -644.72
## 2 21 -635.87 4 17.687 0.00142 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Conclusion: Model 5 has lowest AIC, which is slightly lower than model 3. LR Test confirms that Model 5 is significantly different to Model 3, so we will continue with Model 5’s covariates.
Response: winner, speed.ratio Covariates: oppo.speed, retser1, retser2, start.x, o.angle, oppo.hit.x, lag.p.angle, lag.speed.ratio
library(depmixS4)
t2.mod1 <- depmix(list(winner ~ 1, speed.ratio ~ 1), transition = ~ oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x + lag.p.angle + lag.speed.ratio, data = fed_only, nstates = 2, family=list(multinomial("identity"),gaussian()))
t2.fm1<- fit(t2.mod1)
## iteration 0 logLik: -1693.458
## iteration 5 logLik: -1520.534
## iteration 10 logLik: -1505.303
## iteration 15 logLik: -1501.029
## iteration 20 logLik: -1498.384
## iteration 25 logLik: -1496.731
## iteration 30 logLik: -1495.665
## iteration 35 logLik: -1494.809
## iteration 40 logLik: -1493.973
## iteration 45 logLik: -1493.429
## iteration 50 logLik: -1493.226
## iteration 55 logLik: -1493.16
## iteration 60 logLik: -1493.135
## iteration 65 logLik: -1493.125
## iteration 70 logLik: -1493.123
## iteration 75 logLik: -1493.122
## iteration 80 logLik: -1493.122
## converged at iteration 84 with logLik: -1493.122
summary(t2.fm1)
## Initial state probabilties model
## pr1 pr2
## 1 0
##
## Transition model for state (component) 1
## Model of type multinomial (mlogit), formula: ~oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x +
## lag.p.angle + lag.speed.ratio
## Coefficients:
## St1 St2
## (Intercept) 0 1.35331354
## oppo.speed 0 0.01195563
## retser1 0 0.30956091
## retser2 0 -1.36971300
## start.x 0 0.10347816
## o.angle 0 -0.01583781
## oppo.hit.x 0 -0.14895203
## lag.p.angle 0 0.09179271
## lag.speed.ratio 0 -0.71595312
## Probalities at zero values of the covariates.
## 0.2053292 0.7946708
##
## Transition model for state (component) 2
## Model of type multinomial (mlogit), formula: ~oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x +
## lag.p.angle + lag.speed.ratio
## Coefficients:
## St1 St2
## (Intercept) 0 -1.36887976
## oppo.speed 0 0.07690745
## retser1 0 -7.32971853
## retser2 0 -4.56032079
## start.x 0 0.19199454
## o.angle 0 0.01174521
## oppo.hit.x 0 0.12323298
## lag.p.angle 0 -0.47450636
## lag.speed.ratio 0 -0.02801743
## Probalities at zero values of the covariates.
## 0.7971991 0.2028009
##
##
## Response parameters
## Resp 1 : multinomial
## Resp 2 : gaussian
## Re1.0 Re1.1 Re2.(Intercept) Re2.sd
## St1 0.953 0.047 0.821 0.278
## St2 0.697 0.303 1.363 0.515
Response: winner, p.angle Covariates: oppo.speed, retser1, retser2, start.x, o.angle, oppo.hit.x, lag.p.angle, lag.speed.ratio
library(depmixS4)
t2.mod2 <- depmix(list(winner ~ 1, p.angle ~ 1), transition = ~ oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x + lag.p.angle + lag.speed.ratio, data = fed_only, nstates = 2, family=list(multinomial("identity"),gaussian()))
t2.fm2<- fit(t2.mod2)
## iteration 0 logLik: -7023.854
## iteration 5 logLik: -6755.053
## iteration 10 logLik: -6701.892
## iteration 15 logLik: -6695.553
## iteration 20 logLik: -6693.224
## iteration 25 logLik: -6692.318
## iteration 30 logLik: -6691.98
## iteration 35 logLik: -6691.856
## iteration 40 logLik: -6691.812
## iteration 45 logLik: -6691.794
## iteration 50 logLik: -6691.789
## iteration 55 logLik: -6691.788
## converged at iteration 60 with logLik: -6691.787
summary(t2.fm2)
## Initial state probabilties model
## pr1 pr2
## 1 0
##
## Transition model for state (component) 1
## Model of type multinomial (mlogit), formula: ~oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x +
## lag.p.angle + lag.speed.ratio
## Coefficients:
## St1 St2
## (Intercept) 0 0.3044930703
## oppo.speed 0 -0.0242090562
## retser1 0 0.1155433602
## retser2 0 1.0581688714
## start.x 0 -0.0921656124
## o.angle 0 0.0029190695
## oppo.hit.x 0 -0.0729962676
## lag.p.angle 0 0.0007612852
## lag.speed.ratio 0 0.1823612674
## Probalities at zero values of the covariates.
## 0.4244595 0.5755405
##
## Transition model for state (component) 2
## Model of type multinomial (mlogit), formula: ~oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x +
## lag.p.angle + lag.speed.ratio
## Coefficients:
## St1 St2
## (Intercept) 0 -2.04095433
## oppo.speed 0 -0.01598135
## retser1 0 0.55841034
## retser2 0 1.37961125
## start.x 0 -0.19973323
## o.angle 0 0.01894479
## oppo.hit.x 0 0.01276219
## lag.p.angle 0 -0.02428530
## lag.speed.ratio 0 0.17650515
## Probalities at zero values of the covariates.
## 0.8850304 0.1149696
##
##
## Response parameters
## Resp 1 : multinomial
## Resp 2 : gaussian
## Re1.0 Re1.1 Re2.(Intercept) Re2.sd
## St1 0.828 0.172 11.988 6.443
## St2 0.955 0.045 3.485 2.292
Response: winner, speed.ratio p.angle Covariates: oppo.speed, retser1, retser2, start.x, o.angle, oppo.hit.x, lag.p.angle, lag.speed.ratio
library(depmixS4)
t2.mod3 <- depmix(list(winner ~ 1, speed.ratio ~ 1, p.angle ~ 1), transition = ~ oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x + lag.p.angle + lag.speed.ratio, data = fed_only, nstates = 2, family=list(multinomial("identity"),gaussian(),gaussian()))
t2.fm3<- fit(t2.mod3)
## iteration 0 logLik: -8073.539
## iteration 5 logLik: -7589.665
## iteration 10 logLik: -7575.816
## iteration 15 logLik: -7573.141
## iteration 20 logLik: -7572.208
## iteration 25 logLik: -7571.876
## iteration 30 logLik: -7571.761
## iteration 35 logLik: -7571.722
## iteration 40 logLik: -7571.708
## iteration 45 logLik: -7571.703
## iteration 50 logLik: -7571.702
## converged at iteration 54 with logLik: -7571.701
summary(t2.fm3)
## Initial state probabilties model
## pr1 pr2
## 0 1
##
## Transition model for state (component) 1
## Model of type multinomial (mlogit), formula: ~oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x +
## lag.p.angle + lag.speed.ratio
## Coefficients:
## St1 St2
## (Intercept) 0 1.87207996
## oppo.speed 0 0.01484114
## retser1 0 -0.29325673
## retser2 0 -1.18775749
## start.x 0 0.17671476
## o.angle 0 -0.02244905
## oppo.hit.x 0 -0.03175104
## lag.p.angle 0 0.03038348
## lag.speed.ratio 0 -0.32108701
## Probalities at zero values of the covariates.
## 0.1333012 0.8666988
##
## Transition model for state (component) 2
## Model of type multinomial (mlogit), formula: ~oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x +
## lag.p.angle + lag.speed.ratio
## Coefficients:
## St1 St2
## (Intercept) 0 -0.434740787
## oppo.speed 0 0.022335600
## retser1 0 -0.599600915
## retser2 0 -1.650090532
## start.x 0 0.067928361
## o.angle 0 -0.010360609
## oppo.hit.x 0 0.072635922
## lag.p.angle 0 -0.005805131
## lag.speed.ratio 0 -0.291639094
## Probalities at zero values of the covariates.
## 0.6070052 0.3929948
##
##
## Response parameters
## Resp 1 : multinomial
## Resp 2 : gaussian
## Resp 3 : gaussian
## Re1.0 Re1.1 Re2.(Intercept) Re2.sd Re3.(Intercept) Re3.sd
## St1 0.966 0.034 0.794 0.262 4.00 2.702
## St2 0.804 0.196 1.135 0.489 12.07 6.801
Comparing these three to see which combination of response variables is the best fit
AIC(t2.fm1)
## [1] 3036.243
AIC(t2.fm2)
## [1] 13433.57
AIC(t2.fm3)
## [1] 15201.4
Model 1 - winner and speed.ratio as response variables - is the best fit.
Response: winner, speed.ratio p.angle Covariates: oppo.speed, retser1, retser2, start.x, o.angle, oppo.hit.x, lag.p.angle, lag.speed.ratio, importance
library(depmixS4)
t3.mod1 <- depmix(list(winner ~ 1, speed.ratio ~ 1), transition = ~ oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x + lag.p.angle + lag.speed.ratio + importance, data = fed_only, nstates = 2, family=list(multinomial("identity"),gaussian()))
t3.fm1<- fit(t3.mod1)
## iteration 0 logLik: -1703.828
## iteration 5 logLik: -1521.752
## iteration 10 logLik: -1504.257
## iteration 15 logLik: -1499.822
## iteration 20 logLik: -1497.061
## iteration 25 logLik: -1495.265
## iteration 30 logLik: -1494.044
## iteration 35 logLik: -1493.047
## iteration 40 logLik: -1491.994
## iteration 45 logLik: -1491.26
## iteration 50 logLik: -1491.013
## iteration 55 logLik: -1490.943
## iteration 60 logLik: -1490.92
## iteration 65 logLik: -1490.911
## iteration 70 logLik: -1490.907
## iteration 75 logLik: -1490.906
## iteration 80 logLik: -1490.906
## converged at iteration 82 with logLik: -1490.906
summary(t3.fm1)
## Initial state probabilties model
## pr1 pr2
## 1 0
##
## Transition model for state (component) 1
## Model of type multinomial (mlogit), formula: ~oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x +
## lag.p.angle + lag.speed.ratio + importance
## Coefficients:
## St1 St2
## (Intercept) 0 1.47830418
## oppo.speed 0 0.01102213
## retser1 0 0.41316323
## retser2 0 -1.29973551
## start.x 0 0.08968091
## o.angle 0 -0.01530982
## oppo.hit.x 0 -0.15617371
## lag.p.angle 0 0.09854936
## lag.speed.ratio 0 -0.65599016
## importance 0 -6.36985682
## Probalities at zero values of the covariates.
## 0.1856837 0.8143163
##
## Transition model for state (component) 2
## Model of type multinomial (mlogit), formula: ~oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x +
## lag.p.angle + lag.speed.ratio + importance
## Coefficients:
## St1 St2
## (Intercept) 0 -1.27170435
## oppo.speed 0 0.08070596
## retser1 0 -8.12713095
## retser2 0 -5.19748876
## start.x 0 0.21184025
## o.angle 0 0.01263041
## oppo.hit.x 0 0.11880283
## lag.p.angle 0 -0.47557395
## lag.speed.ratio 0 -0.05179039
## importance 0 2.67678766
## Probalities at zero values of the covariates.
## 0.7810344 0.2189656
##
##
## Response parameters
## Resp 1 : multinomial
## Resp 2 : gaussian
## Re1.0 Re1.1 Re2.(Intercept) Re2.sd
## St1 0.953 0.047 0.821 0.278
## St2 0.700 0.300 1.361 0.516
AIC(t2.fm1)
## [1] 3036.243
AIC(t3.fm1)
## [1] 3035.812
lrtest(t2.fm1,t3.fm1)
## Likelihood ratio test
##
## Model 1: t2.fm1
## Model 2: t3.fm1
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 25 -1493.1
## 2 27 -1490.9 2 4.4313 0.1091
No signficant change in the model from including importance. Decision is to not include it.
Response: winner, speed.ratio p.angle Covariates: oppo.speed, retser1, retser2, start.x, o.angle, oppo.hit.x, lag.p.angle, lag.speed.ratio, points.diff
library(depmixS4)
t3.mod2 <- depmix(list(winner ~ 1, speed.ratio ~ 1), transition = ~ oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x + lag.p.angle + lag.speed.ratio + points.diff, data = fed_only, nstates = 2, family=list(multinomial("identity"),gaussian()))
t3.fm2<- fit(t3.mod2)
## iteration 0 logLik: -1709.785
## iteration 5 logLik: -1523.751
## iteration 10 logLik: -1502.725
## iteration 15 logLik: -1496.437
## iteration 20 logLik: -1492.644
## iteration 25 logLik: -1490.809
## iteration 30 logLik: -1490.041
## iteration 35 logLik: -1489.684
## iteration 40 logLik: -1489.497
## iteration 45 logLik: -1489.393
## iteration 50 logLik: -1489.335
## iteration 55 logLik: -1489.299
## iteration 60 logLik: -1489.277
## iteration 65 logLik: -1489.264
## iteration 70 logLik: -1489.254
## iteration 75 logLik: -1489.247
## iteration 80 logLik: -1489.243
## iteration 85 logLik: -1489.24
## iteration 90 logLik: -1489.238
## iteration 95 logLik: -1489.237
## iteration 100 logLik: -1489.236
## iteration 105 logLik: -1489.236
## converged at iteration 109 with logLik: -1489.236
summary(t3.fm2)
## Initial state probabilties model
## pr1 pr2
## 0 1
##
## Transition model for state (component) 1
## Model of type multinomial (mlogit), formula: ~oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x +
## lag.p.angle + lag.speed.ratio + points.diff
## Coefficients:
## St1 St2
## (Intercept) 0 1.57384250
## oppo.speed 0 -0.07686864
## retser1 0 4.36359754
## retser2 0 10.64879971
## start.x 0 -0.15320218
## o.angle 0 0.03801197
## oppo.hit.x 0 -0.11040731
## lag.p.angle 0 0.31875107
## lag.speed.ratio 0 -0.07939454
## points.diff 0 -0.79666455
## Probalities at zero values of the covariates.
## 0.1716693 0.8283307
##
## Transition model for state (component) 2
## Model of type multinomial (mlogit), formula: ~oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x +
## lag.p.angle + lag.speed.ratio + points.diff
## Coefficients:
## St1 St2
## (Intercept) 0 -1.359228594
## oppo.speed 0 -0.023153540
## retser1 0 -0.119639375
## retser2 0 1.543739033
## start.x 0 -0.134759979
## o.angle 0 0.009865329
## oppo.hit.x 0 0.146888083
## lag.p.angle 0 -0.095061334
## lag.speed.ratio 0 0.819799672
## points.diff 0 0.186987627
## Probalities at zero values of the covariates.
## 0.7956343 0.2043657
##
##
## Response parameters
## Resp 1 : multinomial
## Resp 2 : gaussian
## Re1.0 Re1.1 Re2.(Intercept) Re2.sd
## St1 0.707 0.293 1.340 0.518
## St2 0.954 0.046 0.819 0.276
AIC(t3.fm1)
## [1] 3035.812
AIC(t3.fm2)
## [1] 3032.471
lrtest(t3.fm1,t3.fm2)
## Likelihood ratio test
##
## Model 1: t3.fm1
## Model 2: t3.fm2
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 27 -1490.9
## 2 27 -1489.2 0 3.3411 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Adding the point differential does improve the model. Let’s continue by adding game differential.
Response: winner, speed.ratio p.angle Covariates: oppo.speed, retser1, retser2, start.x, o.angle, oppo.hit.x, lag.p.angle, lag.speed.ratio, points.diff, games.diff
library(depmixS4)
t3.mod3 <- depmix(list(winner ~ 1, speed.ratio ~ 1), transition = ~ oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x + lag.p.angle + lag.speed.ratio + points.diff + games.diff, data = fed_only, nstates = 2, family=list(multinomial("identity"),gaussian()))
t3.fm3<- fit(t3.mod3)
## iteration 0 logLik: -1722.634
## iteration 5 logLik: -1538.868
## iteration 10 logLik: -1503.423
## iteration 15 logLik: -1496.871
## iteration 20 logLik: -1493.091
## iteration 25 logLik: -1491.096
## iteration 30 logLik: -1490.119
## iteration 35 logLik: -1489.644
## iteration 40 logLik: -1489.391
## iteration 45 logLik: -1489.246
## iteration 50 logLik: -1489.162
## iteration 55 logLik: -1489.114
## iteration 60 logLik: -1489.087
## iteration 65 logLik: -1489.073
## iteration 70 logLik: -1489.067
## iteration 75 logLik: -1489.061
## iteration 80 logLik: -1489.059
## iteration 85 logLik: -1489.058
## iteration 90 logLik: -1489.057
## iteration 95 logLik: -1489.056
## converged at iteration 99 with logLik: -1489.056
summary(t3.fm3)
## Initial state probabilties model
## pr1 pr2
## 1 0
##
## Transition model for state (component) 1
## Model of type multinomial (mlogit), formula: ~oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x +
## lag.p.angle + lag.speed.ratio + points.diff + games.diff
## Coefficients:
## St1 St2
## (Intercept) 0 1.36821652
## oppo.speed 0 0.02315394
## retser1 0 0.09808543
## retser2 0 -1.56450921
## start.x 0 0.13299003
## o.angle 0 -0.01021061
## oppo.hit.x 0 -0.14748827
## lag.p.angle 0 0.09540044
## lag.speed.ratio 0 -0.83117593
## points.diff 0 -0.19636798
## games.diff 0 0.01262987
## Probalities at zero values of the covariates.
## 0.2029081 0.7970919
##
## Transition model for state (component) 2
## Model of type multinomial (mlogit), formula: ~oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x +
## lag.p.angle + lag.speed.ratio + points.diff + games.diff
## Coefficients:
## St1 St2
## (Intercept) 0 -1.63822453
## oppo.speed 0 0.07152043
## retser1 0 -4.01433215
## retser2 0 -10.76224794
## start.x 0 0.15545193
## o.angle 0 -0.04000780
## oppo.hit.x 0 0.12846462
## lag.p.angle 0 -0.31336115
## lag.speed.ratio 0 0.06464645
## points.diff 0 0.84075210
## games.diff 0 0.07914765
## Probalities at zero values of the covariates.
## 0.8372932 0.1627068
##
##
## Response parameters
## Resp 1 : multinomial
## Resp 2 : gaussian
## Re1.0 Re1.1 Re2.(Intercept) Re2.sd
## St1 0.955 0.045 0.819 0.275
## St2 0.707 0.293 1.336 0.519
AIC(t3.fm2)
## [1] 3032.471
AIC(t3.fm3)
## [1] 3036.113
lrtest(t3.fm2,t3.fm3)
## Likelihood ratio test
##
## Model 1: t3.fm2
## Model 2: t3.fm3
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 27 -1489.2
## 2 29 -1489.1 2 0.3583 0.836
It appears that adding the games differential doesn’t improve from model 2. Let’s try adding sets as well.
Response: winner, speed.ratio Covariates: oppo.speed, retser1, retser2, start.x, o.angle, oppo.hit.x, lag.p.angle, lag.speed.ratio, points.diff, games.diff, sets.diff
library(depmixS4)
t3.mod4 <- depmix(list(winner ~ 1, speed.ratio ~ 1), transition = ~ oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x + lag.p.angle + lag.speed.ratio + points.diff + games.diff + sets.diff, data = fed_only, nstates = 2, family=list(multinomial("identity"),gaussian()))
t3.fm4<- fit(t3.mod4)
## iteration 0 logLik: -1720.316
## iteration 5 logLik: -1530.202
## iteration 10 logLik: -1498.063
## iteration 15 logLik: -1492.19
## iteration 20 logLik: -1488.826
## iteration 25 logLik: -1487.052
## iteration 30 logLik: -1486.145
## iteration 35 logLik: -1485.67
## iteration 40 logLik: -1485.402
## iteration 45 logLik: -1485.235
## iteration 50 logLik: -1485.119
## iteration 55 logLik: -1485.032
## iteration 60 logLik: -1484.963
## iteration 65 logLik: -1484.91
## iteration 70 logLik: -1484.871
## iteration 75 logLik: -1484.844
## iteration 80 logLik: -1484.827
## iteration 85 logLik: -1484.816
## iteration 90 logLik: -1484.81
## iteration 95 logLik: -1484.807
## iteration 100 logLik: -1484.805
## iteration 105 logLik: -1484.804
## iteration 110 logLik: -1484.803
## iteration 115 logLik: -1484.803
## converged at iteration 117 with logLik: -1484.803
summary(t3.fm4)
## Initial state probabilties model
## pr1 pr2
## 0 1
##
## Transition model for state (component) 1
## Model of type multinomial (mlogit), formula: ~oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x +
## lag.p.angle + lag.speed.ratio + points.diff + games.diff +
## sets.diff
## Coefficients:
## St1 St2
## (Intercept) 0 1.47212213
## oppo.speed 0 -0.06946484
## retser1 0 3.66972743
## retser2 0 10.40321555
## start.x 0 -0.17703565
## o.angle 0 0.04351217
## oppo.hit.x 0 -0.13046840
## lag.p.angle 0 0.30879050
## lag.speed.ratio 0 -0.13299894
## points.diff 0 -0.95286114
## games.diff 0 -0.10036155
## sets.diff 0 -0.04300527
## Probalities at zero values of the covariates.
## 0.1866203 0.8133797
##
## Transition model for state (component) 2
## Model of type multinomial (mlogit), formula: ~oppo.speed + retser1 + retser2 + start.x + o.angle + oppo.hit.x +
## lag.p.angle + lag.speed.ratio + points.diff + games.diff +
## sets.diff
## Coefficients:
## St1 St2
## (Intercept) 0 -0.96932304
## oppo.speed 0 -0.01967138
## retser1 0 -0.03932336
## retser2 0 1.66331897
## start.x 0 -0.12143422
## o.angle 0 0.01195305
## oppo.hit.x 0 0.12667555
## lag.p.angle 0 -0.10249215
## lag.speed.ratio 0 0.98381059
## points.diff 0 0.19374641
## games.diff 0 -0.03277924
## sets.diff 0 -0.41343369
## Probalities at zero values of the covariates.
## 0.7249845 0.2750155
##
##
## Response parameters
## Resp 1 : multinomial
## Resp 2 : gaussian
## Re1.0 Re1.1 Re2.(Intercept) Re2.sd
## St1 0.710 0.290 1.326 0.521
## St2 0.956 0.044 0.819 0.275
AIC(t3.fm3)
## [1] 3036.113
AIC(t3.fm4)
## [1] 3031.606
lrtest(t3.fm4,t3.fm3)
## Likelihood ratio test
##
## Model 1: t3.fm4
## Model 2: t3.fm3
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 31 -1484.8
## 2 29 -1489.1 -2 8.5064 0.01422 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC(t3.fm4)
## [1] 3031.606
AIC(t2.fm1)
## [1] 3036.243
lrtest(t3.fm4,t2.fm1)
## Likelihood ratio test
##
## Model 1: t3.fm4
## Model 2: t2.fm1
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 31 -1484.8
## 2 25 -1493.1 -6 16.637 0.01071 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1